************************************************************
** Merge to Dividend Records 
************************************************************

* Get referring records
use working/family_search_full, clear
gen _temp = see_record 
foreach p in "," "-" "&" {
	replace _temp = subinstr(_temp , "`p'", "|",.)
}

split _temp, parse("|")

forval x = 1/7 {
	gen ref_account`x' = real(regexs(1)) if regexm(_temp`x',"([0-9]+)")
}

replace ref_account1 = real(regexs(1)) if regexm(lower(remarks),"trans[a-z\.]+ from ([0-9]+)")
replace ref_account1 = real(regexs(3))  if regexm(lower(remarks) , "see (record | account )?(no|no\.|#) ([0-9]+)")
keep if !mi(ref_account1)
keep city booknum ref_account*

sort city booknum
duplicates drop
gen x=_n
reshape long ref_account, i(x) j(record)
drop if mi(ref_account)
drop x record
duplicates drop

save working/referring_records, replace




* Merge to the dividend records
clear
import delimited using data/dividend_records.csv, delim(",") varnames(1)
keep city account_number amount
bys city account_number (amount): keep if _n==_N /* Drop duplicates */
rename account_number booknum

save working/dividend_records, replace

merge 1:m city booknum using working/referring_records, keep(match) nogen
drop booknum
rename ref_account booknum
gen record_copy = 1
append using working/dividend_records
replace record_copy = 0 if mi(record_copy)
bys city booknum (amount record_copy): keep if _n==_N /* Drop duplicates */


* This already has the demographic information embedded
* For consistency, this should probably be re-calculated or separated out
merge 1:m city booknum using working/categorized_valid_sequential_records, keep(match using)
gen in_div_rec = _merge==3
drop _merge

* Preserve only the cities for which we have dividend records
tempfile temp
save `temp'

use city using working/dividend_records
sort city
duplicates drop
merge 1:m city using `temp', keep(match) nogen


merge 1:1 fs_pr_ark using data/residence_tagged, nogen keep(match master) keepusing(residence_*)
merge 1:1 fs_pr_ark using data/wbu_tagged, nogen keep(match master) keepusing(wbu_*)
merge 1:1 fs_pr_ark using data/bp_tagged, nogen keep(match master) keepusing(bp_*)

compress
save working/dividend_matched_records, replace

use working/dividend_matched_records, clear
replace age_in_years = 100 if age_in_years>=100 & !mi(age_in_years)
xtile age_range = age_in_years, nq(8)
la define age_range, replace
forval x = 1/8 {
	qui sum age_in_years if age_range == `x'
	local age_min = r(min)
	local age_max = r(max)
 
	la define age_range `x' "`age_min'-`age_max'", add
}

la val age_range age_range

* The following were pre-calculated. I want to re-calculate them just in case
drop pre_panic_acct post_panic_acct post_window 
gen pre_panic_acct =  inrange(date,mdy(1,1,1873),mdy(9,17,1873)) & new_account ==1
gen post_panic_acct =  inrange(date,mdy(9,18,1873),mdy(7,2,1874)) & new_account ==1
gen post_window = date >= mdy(1,1,1873)

gen closed_account = in_div_rec == 0


la var organization "Organization"
la var white "White"
la var brought_up_in_state "Raised in State"
la var born_in_state "Born in State"

replace white = 0 if mi(white)
encode job, g(job_category)
replace job_category = 0 if mi(job_category)
la define job_category 0 "unclassified", add
gen byte missing_job_info = mi(occupation)
gen bis = real(bp_in_state)
gen buis = real(wbu_in_state)
gen local = bis==1 | buis ==1 if !mi(bis) | !mi(buis)

gen bic = real(bp_in_county )
gen buic = real(wbu_in_county )
gen county_local = bic==1 | buic ==1 if !mi(bic) | !mi(buic)

gen in_town =  residence_location == "In Town" if inlist(residence_location,"In Town", "Out of Town")
la var local "Born/Raised in State"
la var in_town "Residence In-Town"

gen yearw = wofd(date)
format yearw %tw
gen time = wofd(mdy(7,2,1874)) - yearw 
gen rtime = time if closed_account==1
gen ltime = time if closed_account==0

keep if year(date)>=1872


* Drop cities which do not have contiguous records up to the failure.
drop if inlist(city, "Tallahassee", "Charleston")
* Drop discontiguous 1872 Beaufort records
drop if city=="Beaufort" & date < mdy(1,14 ,1873)
keep if new_account
gen yearm = mofd(date)
format yearm %tmm-CY


capture drop regime
gen regime = .
local n = 0
local b = mdy(1,1,1872) 
foreach date_break in	mdy(9,21,1873) ////
											mdy(12,26,1873) mdy(4,10,1874) mdy(8,1,1874) {
	di %tdm_D,_CY `date_break'
	local e = `date_break' - 1
	replace regime = `n' if inrange(date,`b',`e')
	local b = `date_break'
	local n = `n' + 1
}


replace age_range = 0 if mi(age_range)
gen missing_age = age_range==0

* Generate new age range
rename age_range age_range_even
label copy age_range age_range_even
la val age_range_even age_range_even

gen age_range = 0
replace age_range = 1 if inrange(age_in_years,0,17)
replace age_range = 2 if inrange(age_in_years,18,25)
replace age_range = 3 if inrange(age_in_years,26,40)
replace age_range = 4 if inrange(age_in_years,40,100)

la define age_range2, replace
forval x = 1/5 {
	qui sum age_in_years if age_range == `x'
	local age_min = r(min)
	local age_max = r(max)
	la define age_range2 `x' "`age_min'-`age_max'", add
}
la val age_range age_range2
la var age_range "Age"

qui sum regime
local maxval = r(max)

capture label drop regime
forval x = 0/`maxval' {
	qui sum date if regime==`x'
	di "`x' " %tdN/D/Y `r(min)' " - " %tdN/D/Y `r(max)'
	local dumlab : di %tdN/D/Y `r(min)' " - " %tdN/D/Y `r(max)'
	local refline`x' = mofd(`r(min)')
	la define regime `x' "`dumlab'", add
}

la val regime regime
la var regime "Dates"
la var closed_account "Account Closure"

la var white "Race"
la define white 1 "White" 0 "Nonwhite", replace
la define local 1 "Local" 0 "Nonlocal", replace
la val white white
la val local local

label define je 1 "High" 2 "Medium" 3 "Low", replace
encode job_education , g(je) label(je)
la var je "Job Education"

keep if mofd(date)<ym(1874,7)


rename job_education job_education_str
label define job_education 1 "High" 2 "Medium" 3 "Low"

encode job_education_str, g(job_education) label(job_education)
replace job_education = 0 if mi(job_education)

la var job_education "Job Education"
la var age_range "Age"
la var in_town "Residence In Town"
la var white "White"

gen missing_in_town = mi(in_town)
replace in_town = 0 if mi(in_town)

gen missing_local = mi(local)
replace missing_local = 1 if organization
replace local = 0 if organization
replace local = 0 if mi(local)

foreach var of varlist missing_in_town missing_job_info missing_age missing_local {
	replace `var' = 1 if organization
}

foreach var of varlist white local in_town age_range job_education {
	replace `var' = 0 if organization
}
la var local "Local"


save working/weekly_closure_test_data, replace
